1 Plot One Variable - X: Continuous or Discrete

For one continuous variable = Numeric:

  • geom_area()
  • geom_density()
  • geom_histogram()
  • geom_freqpoly()
  • geom_dotplot()
  • stat_ecdf()
  • stat_qq()

For one discrete varaible = Factor:

  • geom_bar()
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

1.1 Area Plots

alpha, color, fill, linetype, size

set.seed(1234)
wdata = as_data_frame(data.frame(sex = factor(rep(c("F", "M"), each=200)), weight = c(rnorm(200,55),rnorm(200,58))))
wdata
## # A tibble: 400 × 2
##       sex   weight
##    <fctr>    <dbl>
## 1       F 53.79293
## 2       F 55.27743
## 3       F 56.08444
## 4       F 52.65430
## 5       F 55.42912
## 6       F 55.50606
## 7       F 54.42526
## 8       F 54.45337
## 9       F 54.43555
## 10      F 54.10996
## # ... with 390 more rows
mu <- wdata %>% group_by(sex) %>% summarize(grp.mean = mean(weight))
mu
## # A tibble: 2 × 2
##      sex grp.mean
##   <fctr>    <dbl>
## 1      F 54.94224
## 2      M 58.07325
a <- ggplot(wdata, aes(x = weight))

a + geom_area(stat = "bin", color = "black", fill = "#00AFBB")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#a + geom_area() will not get right result, object 'y' not found. Use stat to specify the count as y 
#Note that, by default y axis corresponds to the count of weight values. If you want to change the plot in order to have the density on y axis, the R code would be as follow.
a + geom_area(aes(y = ..density..), stat = "bin")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

data("diamonds")
diamonds <- as_data_frame(diamonds)
diamonds
## # A tibble: 53,940 × 10
##    carat       cut color clarity depth table price     x     y     z
##    <dbl>     <ord> <ord>   <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1   0.23     Ideal     E     SI2  61.5    55   326  3.95  3.98  2.43
## 2   0.21   Premium     E     SI1  59.8    61   326  3.89  3.84  2.31
## 3   0.23      Good     E     VS1  56.9    65   327  4.05  4.07  2.31
## 4   0.29   Premium     I     VS2  62.4    58   334  4.20  4.23  2.63
## 5   0.31      Good     J     SI2  63.3    58   335  4.34  4.35  2.75
## 6   0.24 Very Good     J    VVS2  62.8    57   336  3.94  3.96  2.48
## 7   0.24 Very Good     I    VVS1  62.3    57   336  3.95  3.98  2.47
## 8   0.26 Very Good     H     SI1  61.9    55   337  4.07  4.11  2.53
## 9   0.22      Fair     E     VS2  65.1    61   337  3.87  3.78  2.49
## 10  0.23 Very Good     H     VS1  59.4    61   338  4.00  4.05  2.39
## # ... with 53,930 more rows
p <- ggplot(diamonds, aes(x = price, fill = cut))
# Bar plot
p + geom_bar(stat = "bin")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Area plot
p + geom_area(stat = "bin")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

1.2 Density Plots

alpha, color, fill, linetype, size

  • scale_color_manual(), scale_fill_manual()
  • scale_color_brewer(), scale_fill_brewer() RColor-Brewer
  • scale_color_grey(), scale_fill_grey()
# Basic plots
a + geom_density()

# Add color and mean xintercept and median xintercept
a + geom_density(color = "black", fill = "gray") + geom_vline(aes(xintercept = mean(weight)), color = "#FC4E08", linetype = "dashed", size = 1) + geom_vline(aes(xintercept = median(weight)), color = "blue", linetype = 4, size = 1)

# Change color by group
a + geom_density(aes(fill = sex), alpha = 0.4) 

# Add mean lines and color by sex
a + geom_density(aes(fill = sex), alpha = 0.4) + geom_vline(data = mu, aes(xintercept = grp.mean, color = sex), linetype = "dashed")

# Change manually 
# change line manually
a2 <- a + geom_density(aes(color = sex)) + geom_vline(data = mu, aes(xintercept = grp.mean, color = sex), linetype = "dashed") + theme_minimal()

a2 + scale_color_manual(values = c("#999999", "#E69F00"))

a2 + scale_color_brewer(palette = "Paired")

a2 + scale_color_grey()

# change fill manually
a3 <- a + geom_density(aes(fill = sex), alpha = 0.4) + theme_minimal()

a3 + scale_fill_manual(values = c("#999999", "#E69F00"))

a3 + scale_fill_brewer(palette = "Dark2")

a3 + scale_fill_grey()

1.3 Histogram Plots

identity(position_identity()), stack(position_stack()), dodge(position_dodge()); Default values is “stack”

alpha, color, fill, linetype, size

# Basic plot
a + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

a + geom_histogram(bins = 50)

#Note that by default, stat_bin uses 30 bins - this might not be good default. You can change the number of bins (e.g.: bins = 50 or the bin width e.g.: binwidth = 0.5.
a + geom_histogram(bins = 50, color = "black", fill = "grey") + geom_vline(aes(xintercept = mean(weight)), color = "#FC4E07", linetype = "dashed", size = 1) + theme_minimal()

a + geom_histogram(aes(y = ..density..), bins = 50)

# Change color by sex
a + geom_histogram(aes(color = sex), fill = "white", bins = 50) + theme_minimal()

# Position adjustment "identity"(overlaid)
a + geom_histogram(aes(color = sex), fill = "white", bins = 50, alpha = 0.6, position = "identity")

# Position adjustment "dodge" (Interleaved)
# Add mean lines and color by sex
a + geom_histogram(aes(color = sex), fill = "white", alpha = 0.6, position = "dodge", bins = 50) + geom_vline(aes(xintercept = mean(weight)), linetype = "dashed")

# Change fill, color manually
# Change outline color manually
a + geom_histogram(aes(color = sex), fill = "white", alpha = 0.4, position = "identity", bins = 50) + scale_color_manual(values = c("#00AFBB","#E7B800"))

# Change fill and outline color manually
# a + geom_histogram(aes(color = sex), fill = "white", alpha =0.4, position = "identity", bins = 50) + scale_fill_manual(values = c("#00AFBB", "#E7B800")) + scale_color_manual(values = c("#00AFBB", "#E7B800")) 
# wrong command, I have to assign fill first by group

a + geom_histogram(aes(color = sex, fill = sex), alpha =0.4, position = "identity", bins = 50) + scale_fill_manual(values = c("#00AFBB", "#E7B800")) + scale_color_manual(values = c("#00AFBB", "#E7B800")) 

## Combine Histogram and Density Plots

# Plot histogram with density values on y-axis(instead of count values).
# Add density plot with transparent density plot

# Histogram with density plot
a + geom_histogram(aes(y = ..density..),color = "black", fill = "white") + geom_density(alpha = 0.2, fill = "#FF6666") + theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Color by groups
a + geom_histogram(aes(y = ..density.., color = sex, fill = sex),  alpha = 0.4, position = "identity") + geom_density(aes(color = sex), size =1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

1.4 Frequency Polygon

Very close to histogram plots

  • Histogram use bars
  • Frequency polygons use lines.

alpha, color, linetype, size

# Basic plot
a + geom_freqpoly(bins = 30) + theme_minimal()

# Change color and linetype by sex
# Use custom color palettes
a + geom_freqpoly(aes(color = sex, linetype = sex), bins = 30 ) +  scale_color_manual(values = c("#999999", "#E69F00"))+theme_minimal()

# y density
a + geom_freqpoly(aes(y = ..density.., color = sex, linetype = sex), bins = 30 ) +  scale_color_manual(values = c("#999999", "#E69F00"))+theme_minimal()

1.5 Dot Plots for One Variable

Not suitable for one variable, it’s ugly.

a + geom_dotplot(aes(fill = sex))
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

1.6 ECDF Plots

Empirical Cumulative Density Function

alpha, color, linetype, size

a + stat_ecdf(geom = "point")

a + stat_ecdf(geom = "step")

1.7 QQ Plots

Quantile - Quantie plots to chech whether a given data follows normal distribution.

alpha, color, shape, size

data(mtcars)
mtcars <- as_data_frame(mtcars)
mtcars
## # A tibble: 32 × 11
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
## *  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1   21.0     6 160.0   110  3.90 2.620 16.46     0     1     4     4
## 2   21.0     6 160.0   110  3.90 2.875 17.02     0     1     4     4
## 3   22.8     4 108.0    93  3.85 2.320 18.61     1     1     4     1
## 4   21.4     6 258.0   110  3.08 3.215 19.44     1     0     3     1
## 5   18.7     8 360.0   175  3.15 3.440 17.02     0     0     3     2
## 6   18.1     6 225.0   105  2.76 3.460 20.22     1     0     3     1
## 7   14.3     8 360.0   245  3.21 3.570 15.84     0     0     3     4
## 8   24.4     4 146.7    62  3.69 3.190 20.00     1     0     4     2
## 9   22.8     4 140.8    95  3.92 3.150 22.90     1     0     4     2
## 10  19.2     6 167.6   123  3.92 3.440 18.30     1     0     4     4
## # ... with 22 more rows
mtcars <- mutate(mtcars, cyl = as.factor(cyl))
mtcars
## # A tibble: 32 × 11
##      mpg    cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <dbl> <fctr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1   21.0      6 160.0   110  3.90 2.620 16.46     0     1     4     4
## 2   21.0      6 160.0   110  3.90 2.875 17.02     0     1     4     4
## 3   22.8      4 108.0    93  3.85 2.320 18.61     1     1     4     1
## 4   21.4      6 258.0   110  3.08 3.215 19.44     1     0     3     1
## 5   18.7      8 360.0   175  3.15 3.440 17.02     0     0     3     2
## 6   18.1      6 225.0   105  2.76 3.460 20.22     1     0     3     1
## 7   14.3      8 360.0   245  3.21 3.570 15.84     0     0     3     4
## 8   24.4      4 146.7    62  3.69 3.190 20.00     1     0     4     2
## 9   22.8      4 140.8    95  3.92 3.150 22.90     1     0     4     2
## 10  19.2      6 167.6   123  3.92 3.440 18.30     1     0     4     4
## # ... with 22 more rows
p <- ggplot(mtcars, aes(sample = mpg))

# Basic plot
p + stat_qq()

# Change point shapes by groups
# Use custom color palettes
p + stat_qq(aes(shape = cyl, color = cyl)) + scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))

1.8 Bar Plots of Counts

For one discrete variable

alpha, color, fill, linetype, size

data(mpg)
mpg <- as_data_frame(mpg)
mpg
## # A tibble: 234 × 11
##    manufacturer      model displ  year   cyl      trans   drv   cty   hwy
##           <chr>      <chr> <dbl> <int> <int>      <chr> <chr> <int> <int>
## 1          audi         a4   1.8  1999     4   auto(l5)     f    18    29
## 2          audi         a4   1.8  1999     4 manual(m5)     f    21    29
## 3          audi         a4   2.0  2008     4 manual(m6)     f    20    31
## 4          audi         a4   2.0  2008     4   auto(av)     f    21    30
## 5          audi         a4   2.8  1999     6   auto(l5)     f    16    26
## 6          audi         a4   2.8  1999     6 manual(m5)     f    18    26
## 7          audi         a4   3.1  2008     6   auto(av)     f    18    27
## 8          audi a4 quattro   1.8  1999     4 manual(m5)     4    18    26
## 9          audi a4 quattro   1.8  1999     4   auto(l5)     4    16    25
## 10         audi a4 quattro   2.0  2008     4 manual(m6)     4    20    28
## # ... with 224 more rows, and 2 more variables: fl <chr>, class <chr>
ggplot(mpg, aes(fl)) + geom_bar(fill = "steelblue") + theme_minimal()

2 Plot Two Variables -X & Y: Both Continuous or Discrete

2.1 Scatter plots: Continuous X and Y

  • geom_pint()
  • geom_smooth()
  • geom_quantile()
  • geom_rug()
  • geom_jitter()
  • geom_text()

geom_point
alpha, color, fill, shape, size

# Data format
mtcars
## # A tibble: 32 × 11
##      mpg    cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <dbl> <fctr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1   21.0      6 160.0   110  3.90 2.620 16.46     0     1     4     4
## 2   21.0      6 160.0   110  3.90 2.875 17.02     0     1     4     4
## 3   22.8      4 108.0    93  3.85 2.320 18.61     1     1     4     1
## 4   21.4      6 258.0   110  3.08 3.215 19.44     1     0     3     1
## 5   18.7      8 360.0   175  3.15 3.440 17.02     0     0     3     2
## 6   18.1      6 225.0   105  2.76 3.460 20.22     1     0     3     1
## 7   14.3      8 360.0   245  3.21 3.570 15.84     0     0     3     4
## 8   24.4      4 146.7    62  3.69 3.190 20.00     1     0     4     2
## 9   22.8      4 140.8    95  3.92 3.150 22.90     1     0     4     2
## 10  19.2      6 167.6   123  3.92 3.440 18.30     1     0     4     4
## # ... with 22 more rows
b <- ggplot(mtcars, aes(x = wt, y= mpg))
# x weight
# y miles/gallon
#Basic scatter plots
b + geom_point(color = "#00AFBB")

# Change the point size, and shape
b + geom_point(color = "#00AFBB", size = 2, shape = 23)

# Control point size by continuous variable values
# qsec 1/4 mile time
b + geom_point(aes(size = qsec), color = "#00AFBB")

# Label text
b + geom_point() + geom_text(label = rownames(mtcars), nudge_y = 0.8)

# Change shape, color, size automatically
# Change point shape by the level of cyl
b + geom_point(aes(shape = cyl))

# Change point shape and colors
b + geom_point(aes(color = cyl, shape = cyl))

# Change shape, color, size manually
# Change the point sizes manually
b + geom_point(aes(color = cyl, shape = cyl, size = cyl)) + scale_size_manual(values = c(2,3,4))

# Change the point shapes and colors manually
b + geom_point(aes(color = cyl, shape = cyl)) + scale_shape_manual(values = c(3,16,17)) + scale_color_manual(values = c('#999999','#E69F00', '#56B4E9'))

# Use brewer color palettes
b + geom_point(aes(color = cyl, shape = cyl)) + scale_color_brewer(palette = "Dark2") + theme_minimal()

# Use grey scale
b + geom_point(aes(color = cyl, shape = cyl)) + scale_color_grey() + theme_minimal()

#####################################################
##Add regression line or smoothed conditional mean###
#####################################################
#geom_smooth(), geom_abline()
#alpha, color, fill, shape, linetype, size
#geom_smooth(method = "auto")
#method:loess->local regression, lm-> linear regression

# Add regression line
b + geom_point() + geom_smooth(method = lm)

# Point + regression line
# Remove the confidence interval
b + geom_point() + geom_smooth(method = lm, se = FALSE)

# loess method, local regression fitting
b + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess'

# Change the color and shape by groups 吧
b + geom_point(aes(color = cyl, shape = cyl)) + geom_smooth(aes(color = cyl, fill = cyl), method = lm)

# Remove confidence intervals
# Extend the regression lines: fullrage
b + geom_point(aes(color = cyl, shape = cyl)) + geom_smooth(aes(color = cyl), method = lm, se = FALSE, fullrange = TRUE)

# Add marginal rugs to a scatter plot
#geom_rug(sides = "bl")
# sides: a string, "trbl", top, right, bottom, left.
# Add marginal rugs
b + geom_point() + geom_rug()

# Change the color by group
b + geom_point(aes(color = cyl)) + geom_rug(aes(color = cyl))

# Add marginal rugs using faithful data
data(faithful)
faithful <- as_data_frame(faithful)
faithful
## # A tibble: 272 × 2
##    eruptions waiting
## *      <dbl>   <dbl>
## 1      3.600      79
## 2      1.800      54
## 3      3.333      74
## 4      2.283      62
## 5      4.533      85
## 6      2.883      55
## 7      4.700      88
## 8      3.600      85
## 9      1.950      51
## 10     4.350      85
## # ... with 262 more rows
ggplot(faithful, aes(x = eruptions, y = waiting)) + geom_point() + geom_rug()

# Jitter points to reduce overplotting
# geom_jitter(), position_jitter()
#alpha, color, fill, shape, size

# Use mpg data
p <- ggplot(mpg, aes(displ, hwy))

# Default sactter plot
p + geom_point()

# Use jitter to reduce overplotting
p + geom_jitter(position = position_jitter(width = 0.5, height = 0.5))

select(mpg, displ, hwy) %>% arrange(-hwy) %>% filter(displ == 1.9)
## # A tibble: 3 × 2
##   displ   hwy
##   <dbl> <int>
## 1   1.9    44
## 2   1.9    44
## 3   1.9    41
##
#Text annotation
#geom_text()
#label, alpha, angle, color, family, fontface, hjust, lineheight, size, vjust

b + geom_text(aes(label = rownames(mtcars)), size = 3)

2.2 Continuous bivariate distribution

  • geom_bin2d()
  • geom_hex()
  • geom_density_2d()
c <- ggplot(diamonds, aes(carat, price))
# Add heatmap of 2d bin counts
# geom_bin2d produce a scatter plot with rectangular bins.
# stat_bin_2d(), stat_summary_2d()
# max, xmin, ymax, ymin, alpha, color, fill, linetype, size
c + geom_bin2d()

# Change the number of bins
c + geom_bin2d(bins = 15)

# Specify the width of bins
c + geom_bin2d(binwidth = c(1,1000))

c + stat_bin_2d()

c + stat_summary_2d(aes(z = depth))

# Add hexagon bining
#geom_hex()
# stat_bin_hex(), stat_summary_hex()
# alpha, color, fill, size
require(hexbin)
## Loading required package: hexbin
c + geom_hex()

# Change the number of bins
c + geom_hex(bins = 10)

c + stat_bin_hex()

c + stat_summary_hex(aes(z = depth))

# 2D density estimation
# geom_density_2d()
# stat_density_2d()
# alpha, color, linetype, size

# Scatter plot
sp <- ggplot(faithful, aes(x = eruptions, y = waiting))
select(faithful, eruptions, waiting)
## # A tibble: 272 × 2
##    eruptions waiting
## *      <dbl>   <dbl>
## 1      3.600      79
## 2      1.800      54
## 3      3.333      74
## 4      2.283      62
## 5      4.533      85
## 6      2.883      55
## 7      4.700      88
## 8      3.600      85
## 9      1.950      51
## 10     4.350      85
## # ... with 262 more rows
# Default plot
sp + geom_density_2d(color = "#E7B800")

# Add points
sp + geom_point(color = "#00AFBB") + geom_density_2d(color = "#E7B800")

# Use stat_density_2d with geom = "polygon"
sp + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")

# Change the gradient color
sp + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon") + scale_fill_gradient(low = "#00AFBB", high = "#FC3E07")

# Gradient

2.3 Two variables: Discrete X, Discrete Y

geom_jitter
alpha, color, fill, shape, size

ggplot(diamonds, aes(cut, color)) + geom_jitter(aes(color = cut), size = 0.5)

select(diamonds, cut, color)
## # A tibble: 53,940 × 2
##          cut color
##        <ord> <ord>
## 1      Ideal     E
## 2    Premium     E
## 3       Good     E
## 4    Premium     I
## 5       Good     J
## 6  Very Good     J
## 7  Very Good     I
## 8  Very Good     H
## 9       Fair     E
## 10 Very Good     H
## # ... with 53,930 more rows

3 Plot Two Variables - X & Y: Discrete X and Continuous Y

  • geom_boxplot()
  • geom_violin()
  • geom_dotplot()
  • geom_jitter()
  • geom_line()
  • geom_bar()
data("ToothGrowth")
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
ToothGrowth <- as_data_frame(ToothGrowth)
ToothGrowth
## # A tibble: 60 × 3
##      len   supp   dose
##    <dbl> <fctr> <fctr>
## 1    4.2     VC    0.5
## 2   11.5     VC    0.5
## 3    7.3     VC    0.5
## 4    5.8     VC    0.5
## 5    6.4     VC    0.5
## 6   10.0     VC    0.5
## 7   11.2     VC    0.5
## 8   11.2     VC    0.5
## 9    5.2     VC    0.5
## 10   7.0     VC    0.5
## # ... with 50 more rows
e <- ggplot(ToothGrowth, aes(x = dose, y = len))

3.1 Box Plots

alpha, color, linetype, shape, size, fill
geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, notch = FALSE)

# Basic box plot
e + geom_boxplot()

# Rotate the box plot
e + geom_boxplot() + coord_flip()

# Notched box plot
e + geom_boxplot(notch = TRUE)

# Box plot with mean points
e + geom_boxplot() + stat_summary(fun.y = mean, geom = "point", shape = 18, size = 4, color = "blue")

# chose which item to display
e + geom_boxplot() + scale_x_discrete(limits = c("0.5", "2"))
## Warning: Removed 20 rows containing non-finite values (stat_boxplot).

# change default order of items
e + geom_boxplot() + scale_x_discrete(limits = c("2", "0.5", "1"))

e + stat_boxplot(coeff = 1.5)
## Warning: Ignoring unknown parameters: coeff

# change the color by group
# box plot outline and fill colors can be automatically controlled by the levels of the grouping variable *dose*

# Use single color
e + geom_boxplot(color = "black", fill = "steelblue")

# Change outline colors by dose (groups)
e + geom_boxplot(aes(color = dose))

# Change the fill color by dose (groups)
e + geom_boxplot(aes(fill = dose))

# Change munually outline colors:
# Use custom color palettes
e2 <- e + geom_boxplot(aes(color = dose)) + theme_minimal()
e2 + scale_color_manual(values = c("#999999", "#E69F00", "#56B4E9"))

# Use brewer color palettes
e2 + scale_color_brewer(palette  = "Dark2")

# Use grey scale
e2 + scale_color_grey()

## Change manually by fill color
# Use the custom color palettes
e3 <- e + geom_boxplot(aes(fill = dose)) + theme_minimal()
e3 + scale_fill_manual(values = c("#999999", "#E69F00", "#56B4E9"))

# Use brewer color palettes
e3 + scale_fill_brewer(palette = "Dark2")

# Use grey color
e3 + scale_fill_grey()

## Boxplot with multiple groups
#The grouping variable *dose* and *supp* are used:

# Change box plot colors by groups
e + geom_boxplot(aes(fill = supp))

# Change the position
e + geom_boxplot(aes(fill = supp), position = position_dodge(1.1))

# Change the fill color
e + geom_boxplot(aes(fill = supp), position = position_dodge(1.1)) + scale_fill_brewer("BrBG")

3.2 Violin Plots

Violin plots is similar to boxplot, except that they also show the kernel probability density of the data at different values. Tipically, violin plots will include a marker for the median of the data and a box indicating the interquartile range, as in standard boxplots.

alpha, color, fill, linetype, size, and fill

# Basic plot
e + geom_violin()

# Rotate the violin plot
e + geom_violin() + coord_flip()

# Set trim argument to FALSE
e + geom_violin(trim = FALSE, fill = "steelblue")

## Add summary statistics
# Funtion stat_summary can be used to add mean/median points and more on a violin plot

# Add mean and median points: use fun.y = mean or fun.y = median
e + geom_violin(trim = FALSE) + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2, color = "blue")

# Add mean points +/- SD
# Use geom = "pointrange" or geom = "crossbar"
e + geom_violin(trim = FALSE) + stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 1), geom = "pointrange", color = "red")

# The function mean_sdl is used for adding mean and standard deviation. 
# It computes the mean plus or minus a constant times the standard deviation. The constant is specified using the argument mult (mult = 1). Default mult = 2.
# The mean +/- SD can be added as crossbar or a pointrange. 


# Combine with box plot to add median and quartiles
e + geom_violin(trim = FALSE) + geom_boxplot(width = 0.2)

## Change colors by groups
# The color and fill can be automatically controlled by the levels  of the grouping variable dose

# Change the outline colors by dose (groups)
e + geom_violin(aes(color = dose), trim = FALSE)

# Change the fill color by dose
e  + geom_violin(aes(fill = dose), trim = FALSE)

# Change outline and fill color manually.
e2 <- e + geom_violin(aes(color = dose), trim = FALSE) + theme_minimal()
e2 + scale_color_brewer(palette = "Dark2")

# Change manually fill colors
e3 <- e + geom_violin(aes(fill = dose), trim = FALSE) + theme_minimal()
e3 + scale_fill_brewer(palette = "Dark2")

## Violin plot with multiple groups
# Change the color by groups
e + geom_violin(aes(fill = supp), trim = FALSE)

# Change fill colors
e + geom_violin(aes(fill = supp), trim = FALSE) + scale_fill_brewer(palette = "Dark2")

3.3 Dot plots

geom_dotplot(), stat_summary()
alpha, color, dotsize and fill

#Basic dot plot
e + geom_dotplot(binaxis ="y", stackdir = "center")
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

# Change dotsize and stack ratio
e + geom_dotplot(binaxis = "y", stackdir = "center", stackratio = 1.5, dotsize = 1.1)
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

# stat_summary can be used to add mean/median points and more on a violin plot
# Add mean and median points: use fun.y = mean or fun.y = median
e + geom_dotplot(binaxis = "y", stackdir = "center") + stat_summary(fun.y = mean, geom = "point", shape = 18, size = 3, color = "red")
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

# Add mean points with +/- SD
# Use geom = "pointrange" or geom = "crossbar"
e + geom_dotplot(binaxis = "y", stackdir = "center") + stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 1), geom = "pointrange", color = "red")
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

## Combine with box plot and dot plot:
# Combine with boxplot
e + geom_boxplot() + geom_dotplot(binaxis = "y", stackdir ="center")
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

# Combine with violin plot
e + geom_violin(trim = FALSE) + geom_dotplot(binaxis = "y", stackdir ="center")
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

# Dotplot + violin plot + stat summary
e + geom_violin(trim = FALSE) + geom_dotplot(binaxis = "y", stackdir ="center") + stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 1), geom = "pointrange", color = "red", shape = 11)
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

# use scale to change the outlien and fill color automatically controlled byt the levels of the grouping variable dose
# scale_color_munual(), scale_color_brewer(), scale_color_grey()
# scale_fill_munual(), sclae_fill_brewer(), scale_fill_grey()

e + geom_dotplot(binaxis = "y", stackdir = "center", aes(color = dose), fill = "white") + theme_minimal()
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

## dotplot with multiple groups
# just like boxplot and violin plot

3.4 Stripcharts

Stripecharts are also known as one dimensional scatter plots. These plots are suitable compared to box plot when sample sizes are small.
geom_jitter(), stat_summary()
alpha, color, size and fill

e + geom_jitter()

# Change the position
# 0.2 is the degree of jitter in x direction
e + geom_jitter(position = position_jitter(0.2))

# Change point shapes and size
e  + geom_boxplot()+ geom_jitter(position = position_jitter(0.2), shape = 11, size = 1.2)

# Add summary statistics
# Add mean or median point
e + geom_jitter(position = position_jitter(0.2)) + stat_summary(fun.y = mean, geom = "point", shape = 18, size = 3, color = "red")

# use geom = "pointrange"
e + geom_jitter(position = position_jitter(0.2)) + stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 1), shape = 18, color = "red")

# Combine with boxplot and violin plot
e + geom_violin(trim = FALSE) + geom_jitter(position = position_jitter(0.1)) + stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 1), shape = 18, color = "red")

# Change point shape by group
e + geom_jitter(aes(shape = dose), position = position_jitter(0.2)) + scale_shape_manual(values = c(1,17,19))

# Change color by groups
e + geom_jitter(aes(color = dose, shape = dose), position = position_jitter(0.2)) + theme_minimal()

# Change the outlien and fill color by scale

##Stripchar with multiple groups
#Change colors and shapes by groups
e + geom_jitter(aes(color = supp, shape = supp), position = position_jitter(0.2))

# Add boxplot
e + geom_boxplot(aes(color = supp), position = position_dodge()) + geom_jitter(aes(color = supp, shape = supp), position = position_jitter(0.2)) + theme_minimal()

3.5 Line plots

In a line graph, observations are ordered by x value and connected.
x value can be:

  • date: for a time series data
  • texts
  • discrete numeric values
  • continuous numeric values

geom_line(), geom_path(), geom_step()

alpha, color, linetype and size

df <- data.frame(dose = c("D0.5", "D1", "D2"), len = c(4.2,10, 29.5))
df2 <- data.frame(supp = rep(c("VC", "OJ"), each = 3), dose = rep(c("D0.5", "D1", "D2"),2 ), len = c(6.8, 15, 33, 4.2, 10, 29.5))

p<- ggplot(data = df, aes(x = dose, y = len, group = 1))
p + geom_line() + geom_point()

# Change the line color and line type
p + geom_line(linetype = "dashed", color = "steelblue") + geom_point(color = "steelblue")

# use geom_step()
p + geom_step() + geom_point()

# use paht
p + geom_path() 

# Line plot with multiple groups
# line tpye and point shape automatically controlled by groups.
p <- ggplot(df2, aes(x = dose, y= len, group = supp))
p + geom_line(aes(linetype = supp)) + geom_point(aes(shape = supp))

# Change the line type, point shapes and colors
p + geom_line(aes(linetype = supp, color = supp)) + geom_point(aes(shape = supp, color = supp)) + scale_color_brewer(palette = "Dark2")

# X-axis is date; use economics
head(economics)
## # A tibble: 6 × 6
##         date   pce    pop psavert uempmed unemploy
##       <date> <dbl>  <int>   <dbl>   <dbl>    <int>
## 1 1967-07-01 507.4 198712    12.5     4.5     2944
## 2 1967-08-01 510.5 198911    12.5     4.7     2945
## 3 1967-09-01 516.3 199113    11.7     4.6     2958
## 4 1967-10-01 512.9 199311    12.5     4.9     3143
## 5 1967-11-01 518.1 199498    12.5     4.7     3066
## 6 1967-12-01 525.8 199657    12.1     4.8     3018
ggplot(data = economics, aes(x = date, y = pop)) + geom_line()

# subset data
ss <- subset(economics, date > as.Date("2006-1-1"))
ggplot(data = ss, aes(x = date, y = pop)) + geom_line()

# line size
ggplot(data = economics, aes(x = date, y = pop, size = unemploy/ pop)) + geom_line()

# multiple time series data:
# Solution 1
ggplot(economics, aes(x = date)) + geom_line(aes(y = psavert, color = "darkred")) + geom_line(aes(y = uempmed), color = "steelblue", linetype = "twodash") + theme_minimal()

# Solution 2: melt by date

# Area plot
ggplot(economics, aes(x = date)) + geom_area(aes(y = psavert), fill = "#999999", color = "#999999", alpha = 0.5) + geom_area(aes(y = uempmed), fill = "#E69F00", color = "#E69F00", alpha = 0.5) + theme_minimal()

3.6 Bar plots

geom_bar()
alpha, color, fill, linetype and size

df <- data.frame(dose = c("D0.5", "D1", "D2"), len = c(4.2,10, 29.5))
df2 <- data.frame(supp = rep(c("VC", "OJ"), each = 3), dose = rep(c("D0.5", "D1", "D2"),2 ), len = c(6.8, 15, 33, 4.2, 10, 29.5))

f <- ggplot(df, aes(x = dose, y = len))

f + geom_bar(stat = "identity")

#Change fill color and add labels at the top
f + geom_bar(stat= "identity", fill = "steelblue") + geom_text(aes(label = len), vjust = -0.3, size = 3.5) + theme_minimal()

f + geom_bar(stat= "identity", fill = "steelblue") + geom_text(aes(label = len), vjust = 1.6, size = 3.5, color = "white") + theme_minimal() + scale_x_discrete(limits = c("D2", "D0.5", "D1"))

# change the color by groups
f + geom_bar(aes(color = dose), stat = "identity", fill = "white")

#bar plot with multiple groups
g <- ggplot(data =df2, aes(x = dose, y = len, fill = supp))

# Statcked bar plot
g + geom_bar(stat = "identity")

# Use position = position_dodge()
g + geom_bar(stat = "identity", position = position_dodge()) + geom_text(aes(label = len), vjust = 1.6, color = "white", position = position_dodge(0.9), size = 3.5)

library(dplyr)
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
df_sorted <- arrange(df2, dose, supp)
df_cumsum <- ddply(df_sorted, "dose", transform, label_ypos = cumsum(len))

# Create the bar plot
ggplot(data = df_cumsum, aes(x = dose, y = len, fill = supp)) + geom_bar(stat = "identity") + geom_text(aes(label = len, y = label_ypos), vjust = 1.6, color = "white", size = 3.5)